# download_aps_pdfs.py
# PRPED (Physical Review Physics Education Research)  Downloader
# Partially successful downloader for Physical Review Physics Education Research
# - Parses APS issue TOC
# - Attempts to retrieve article PDFs
# - Handles APS-specific URL patterns
# - Requires manual intervention for restricted access articles


import requests
import os

os.makedirs("APS_PRPER_PDFs", exist_ok=True)

with open("aps_pdf_links.txt", "r") as f:
    links = [line.strip() for line in f if line.strip()]

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36"
    # "Cookie": "paste_your_cookie_string_here"  # <-- use this if needed
}

for url in links:
    doi_part = url.split("/")[-1]
    filename = doi_part.replace("/", "_") + ".pdf"
    filepath = os.path.join("APS_PRPER_PDFs", filename)

    print(f"Downloading: {url}")
    r = requests.get(url, headers=headers, stream=True)

    if r.status_code == 200:
        with open(filepath, "wb") as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
        print(f"✅ Saved: {filename}")
    else:
        print(f"⚠️ Failed or blocked: {url} (status {r.status_code})")

print("\nAll done! PDFs are in APS_PRPER_PDFs folder.")
